{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "Python 3.7.0 64-bit ('py37': conda)",
   "display_name": "Python 3.7.0 64-bit ('py37': conda)",
   "metadata": {
    "interpreter": {
     "hash": "d1d7cbd7ea310cd54c62fa939877dbcd600804f23a57e3701f31ba0ad7f22c97"
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "tensor([[1., 1.],\n        [1., 1.]], requires_grad=True)\ntensor([[3., 3.],\n        [3., 3.]], grad_fn=<AddBackward0>)\ntensor([[27., 27.],\n        [27., 27.]], grad_fn=<MulBackward0>)\ntensor(27., grad_fn=<MeanBackward0>)\n"
     ]
    }
   ],
   "source": [
    "x = torch.ones(2, 2, requires_grad=True)\n",
    "y = x + 2\n",
    "\n",
    "z = y * y * 3\n",
    "out = z.mean()\n",
    "\n",
    "print(x)\n",
    "print(y)\n",
    "print(z)\n",
    "print(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "out.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "tensor([[4.5000, 4.5000],\n        [4.5000, 4.5000]])\nNone\n"
     ]
    }
   ],
   "source": [
    "print(x.grad)\n",
    "print(y.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "99 1013.6261596679688\n199 15.620402336120605\n299 0.3377649188041687\n399 0.008025076240301132\n499 0.0004183468990959227\n"
     ]
    }
   ],
   "source": [
    "dtype = torch.float\n",
    "device = torch.device(\"cpu\") # 使用CPU\n",
    "# device = torch.device(\"cuda:0\") # 如果使用GPU，请打开注释\n",
    "\n",
    "# N: batch size\n",
    "# D_in: 输入维度\n",
    "# H: 隐藏层\n",
    "# D_out: 输出维度 \n",
    "N, D_in, H, D_out = 64, 1000, 100, 10\n",
    "\n",
    "# 初始化随机数x, y\n",
    "# x, y用来模拟机器学习的输入和输出\n",
    "x = torch.randn(N, D_in, device=device, dtype=dtype)\n",
    "y = torch.randn(N, D_out, device=device, dtype=dtype)\n",
    "\n",
    "# 初始化模型的参数w1和w2\n",
    "# 均设置为 requires_grad=True\n",
    "# PyTorch会跟踪w1和w2上的计算，帮我们自动求导\n",
    "w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\n",
    "w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n",
    "\n",
    "learning_rate = 1e-6\n",
    "for t in range(500):\n",
    "    # 前向传播过程：\n",
    "    # h1 = x * w1\n",
    "    # y = h1 * w2\n",
    "    y_pred = x.mm(w1).clamp(min=0).mm(w2)\n",
    "\n",
    "    # 计算损失函数loss\n",
    "    # loss是误差的平方和\n",
    "    loss = (y_pred - y).pow(2).sum()\n",
    "    if t % 100 == 99:\n",
    "        print(t, loss.item())\n",
    "\n",
    "    # 反向传播过程：\n",
    "    # PyTorch会对设置了requires_grad=True的Tensor自动求导，本例中是w1和w2\n",
    "    # 执行完backward()后，w1.grad 和 w2.grad 里存储着对于loss的梯度\n",
    "    loss.backward()\n",
    "\n",
    "    # 根据梯度，更新参数w1和w2\n",
    "    with torch.no_grad():\n",
    "        w1 -= learning_rate * w1.grad\n",
    "        w2 -= learning_rate * w2.grad\n",
    "\n",
    "        # 将 w1.grad 和 w2.grad 中的梯度设为零\n",
    "        # PyTorch的backward()方法计算梯度会默认将本次计算的梯度与.grad中已有的梯度加和\n",
    "        # 必须在反向传播前先清零\n",
    "        w1.grad.zero_()\n",
    "        w2.grad.zero_()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}